//===------------------------------ SourceComparator.h -----------*- C++-*-===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2014 - 2017 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//

// Declares the code needed to essentially "diff" the previously-compiled and
// possibly-about-to-be-compiled versions of a source file.
// Unlike Unix diff, it can go down to the granularity of characters within a
// line.

#ifndef POLARPHP_DRIVER_SOURCECOMPARATOR_H
#define POLARPHP_DRIVER_SOURCECOMPARATOR_H

#include "polarphp/ast/IncrementalRanges.h"
#include "llvm/Support/MemoryBuffer.h"
#include <string>
#include <unordered_map>
#include <vector>

namespace polar {
namespace incremental_ranges {

class SourceComparator {

public:
   //==============================================================================
   // MARK: Packaging indices for both sides
   //==============================================================================

   /// It is useful to have structures that hold both the left- and
   /// right-hand-sides of a "diff" input Describes a side:
   enum class Side : size_t { L = 0, R = 1 };

   /// A general class to hold something that exists on both sides:
   template <typename T> struct LR {
      T contents[2];
      /// Get the contents on a particular side
      T &operator[](const Side side) { return contents[size_t(side)]; }
      const T &operator[](const Side side) const {
         return contents[size_t(side)];
      }

      /// Get a side by name:
      // TODO: Could get ride of these and just use the subscripting above
      T &lhs() { return contents[size_t(Side::L)]; }
      T &rhs() { return contents[size_t(Side::R)]; }

      const T &lhs() const { return contents[size_t(Side::L)]; }
      const T &rhs() const { return contents[size_t(Side::R)]; }
      LR(T lhs, T rhs) {
         contents[0] = lhs;
         contents[1] = rhs;
      }
      bool operator==(const LR &other) {
         return lhs() == other.lhs() && rhs() == other.rhs();
      }
   };

   /// A structure holding size_t's on both sides.
   /// Zero-origin
   template <typename Derived> struct Indices : public LR<size_t> {
      Indices(size_t lhs, size_t rhs) : LR(lhs, rhs) {}
      Derived operator+(size_t x) const { return Derived(lhs() + x, rhs() + x); }
      Derived operator-(size_t x) const { return Derived(lhs() - x, rhs() - x); }
      size_t min() const { return std::min(lhs(), rhs()); }
      bool operator<(const Derived &other) const {
         return lhs() < other.lhs() && rhs() < other.rhs();
      }
      bool operator<=(const Derived &other) const {
         return lhs() <= other.lhs() && rhs() <= other.rhs();
      }
   };
   /// Character indices into a line.
   /// Zero-origin
   struct CharIndices : public Indices<CharIndices> {
      CharIndices(size_t lhs, size_t rhs) : Indices(lhs, rhs) {}
      CharIndices &operator++() {
         ++lhs(), ++rhs();
         return *this;
      }
      CharIndices &operator--() {
         --lhs(), --rhs();
         return *this;
      }
      bool bothNonZero() const { return lhs() != 0 && rhs() != 0; }
   };

   /// Indices into a collection of lines.
   /// Zero-origin
   struct LineIndices : public Indices<LineIndices> {
      LineIndices() : Indices(~0, ~0) {}
      LineIndices(size_t lhs, size_t rhs) : Indices(lhs, rhs) {}
      LineIndices &operator=(const LineIndices &) = default;
      LineIndices &operator++() {
         ++lhs(), ++rhs();
         return *this;
      }
      LineIndices &operator--() {
         --lhs(), --rhs();
         return *this;
      }
   };

   /// Strings on both sides.
   struct StringRefs : LR<StringRef> {
      StringRefs(StringRef lhs, StringRef rhs) : LR(lhs, rhs) {}
      CharIndices size() const { return CharIndices(lhs().size(), rhs().size()); }
      /// Do the strings match at the provided character indices?
      bool matchAt(const CharIndices &indices) const {
         return lhs()[indices.lhs()] == rhs()[indices.rhs()];
      }
   };

   /// Optional strings on both sides
   struct OptStringRefs : LR<Optional<StringRef>> {
      OptStringRefs(Optional<StringRef> lhs, Optional<StringRef> rhs)
      : LR(lhs, rhs) {}
   };

   /// A half-open span of lines on both sides, from start up to but not
   /// including end.
   struct LineSpan {
      /// [start, end)
      LineIndices start, end;

      LineSpan(LineIndices start, LineIndices end) : start(start), end(end) {}
      bool operator==(const LineSpan &rhs) {
         return start == rhs.start && end == rhs.end;
      }
      bool bothNonEmpty() const {
         return start.lhs() < end.lhs() && start.rhs() < end.rhs();
      }
      bool eitherNonEmpty() const {
         return start.lhs() < end.lhs() || start.rhs() < end.rhs();
      }
      bool bothEmpty() const { return !eitherNonEmpty(); }

      LineIndices size() const {
         return LineIndices(end.lhs() - start.lhs(), end.rhs() - start.rhs());
      }
   };

   /// A pair of collections of lines, one on each side
   struct Lines : LR<std::vector<StringRef>> {
      Lines(std::vector<StringRef> lhs, std::vector<StringRef> rhs)
         : LR(lhs, rhs) {}

      bool matchAt(LineIndices where) const {
         return lhs()[where.lhs()] == rhs()[where.rhs()];
      }
      StringRefs operator[](const LineIndices &indices) const {
         return StringRefs(lhs()[indices.lhs()], rhs()[indices.rhs()]);
      }
      /// Cannot use subscripting to get a particular side here because used it
      /// above for something else. So use a named function.
      const std::vector<StringRef> &side(Side s) const {
         return s == Side::L ? lhs() : rhs();
      }
      LineIndices size() const { return LineIndices(lhs().size(), rhs().size()); }

      CharIndices sizeOfLastLines() const {
         return CharIndices(lhs().empty() ? 0 : lhs().back().size(),
                            rhs().empty() ? 0 : rhs().back().size());
      }
   };

   /// A pair of SerializableSourceRanges, one per side.
   /// (SerializableSourceRanges are 1-origin, and semi-open.)
   struct LRSerializableRange : LR<SerializableSourceRange> {
      LRSerializableRange(const SerializableSourceRange &lhs,
                          const SerializableSourceRange &rhs)
         : LR(lhs, rhs) {}
   };
   /// A pair of vectors of SerializableSourceRanges, one per side.
   struct LRRanges : LR<Ranges> {
      LRRanges() : LRRanges({}, {}) {}
      LRRanges(Ranges &&lhs, Ranges &&rhs) : LR(std::move(lhs), std::move(rhs)) {}
      static LRRanges wholeFile() {
         return {SerializableSourceRange::RangesForWholeFile(),
                 SerializableSourceRange::RangesForWholeFile()};
      }
      void push_back(const LRSerializableRange &range) {
         lhs().push_back(range.lhs());
         rhs().push_back(range.rhs());
      }
      bool empty() const {
         assert(lhs().empty() == rhs().empty() && "diff should produce at least "
                                                  "empty ranges for an addition "
                                                  "to the other side");
         return lhs().empty();
      }
   };

private:
   /// The inputs, separated into lines
   Lines linesToCompare;

   /// The trimmed input regions to compare, after cutting away identical
   /// beginnings and endings.
   LineSpan regionsToCompare;

   /// If line i on the left matches line j on the right, match[i] == j.
   /// If line i on the left has no match, match[i] == None.
   std::vector<Optional<size_t>> matches;

   /// Chains matching lines together (I think)
   struct PRLink {
      LineIndices lines;
      Optional<size_t> next;
      PRLink(LineIndices lines, const Optional<size_t> next)
         : lines(lines), next(next) {}
      PRLink() : PRLink(LineIndices(), None) {}
   };


   //==============================================================================
   // MARK: SortedSequence
   //==============================================================================
private:
   class SortedSequence {
      typedef size_t Elem;
      std::vector<Elem> contents;

   public:
      /// Find the place at which x would normally be inserted into the sequence,
      /// and replace that element with x, returning the index.
      /// If x is already in the sequence, do nothing and return None.
      /// Append to the end if necessary.
      /// Preserve sort order.
      Optional<size_t> replaceNextLargerWith(const Elem x);
      bool empty() const { return contents.empty(); }

      Elem back() const { return contents.back(); }

      size_t size() const { return contents.size(); }

   private:
      /// Return index of first greater element or the end index if there is none.
      size_t locationOfFirstGreaterOrEqualElement(const Elem x) const;
   };

   //==============================================================================
   // MARK: initialization
   //==============================================================================

public:
   SourceComparator(StringRef s1, StringRef s2);

private:
   static std::vector<StringRef> splitIntoLines(StringRef s);

   //==============================================================================
   // MARK: Comparing
   //==============================================================================
public:
   /// Run the actual diff algorithm
   void compare();

private:
   void trimStart();
   void trimEnd();

   /// Return a map from a line in rhs to indices of all identical lines in
   /// rhs
   std::unordered_map<std::string, std::vector<size_t>>
   buildEquivalenceClasses();

   std::pair<std::vector<PRLink>, SortedSequence> buildDAGOfSubsequences(
      std::unordered_map<std::string, std::vector<size_t>> rhsMap);

   void scanMatchedLines(
      std::pair<std::vector<PRLink>, SortedSequence> &&linksAndThres);

   //==============================================================================
   // MARK: summarizing
   //==============================================================================
public:
   std::vector<LineSpan> getMatchingRegions() const;
   std::vector<LineSpan> getMismatchingRegions() const;

   void forEachMatch(function_ref<void(const LineSpan)>) const;
   void forEachMismatch(function_ref<void(const LineSpan)>) const;

   /// A region is zero-origin, semi-open. An SerializableRange is 1-origin,
   /// closed.
   /// This function resolves changed regions to within lines. It's a bit tricky.
   LRSerializableRange convertAMismatch(LineSpan mismatch) const;

   LRRanges convertAllMismatches() const;

private:
   bool areEitherInRange(LineIndices lines) const;

   LineIndices getFirstMatch() const;
   LineIndices getFirstMismatch() const;

   LineIndices getNextMismatchAfter(const LineIndices nextMatch) const;

   LineIndices getNextMatchAfter(const LineIndices nextMismatch) const;

   //==============================================================================
   // MARK: converting mismatches
   //==============================================================================
   CharIndices computeFirstMismatchInLine(LineSpan mismatchLines) const;
   CharIndices
   computeFirstMismatchInLinesOnBothSides(LineSpan mismatchLines) const;
   CharIndices computeFirstMismatchInLineOnOneSide(LineSpan mismatchLines,
                                                   Side) const;

   CharIndices computeEndMismatchInLine(LineSpan mismatchLines,
                                        CharIndices firstMismatchInLine) const;
   CharIndices computeEndMismatchInLineOnBothSides(
      LineSpan mismatchLines, const CharIndices firstMismatchInLine) const;
   CharIndices
   computeEndMismatchInLineOnOneSide(LineSpan mismatchLines,
                                     const CharIndices firstMismatchInLine,
                                     const Side) const;

   size_t computeEndLinesToGoWithChars(LineSpan mismatchLines,
                                       CharIndices endMismatchInLine,
                                       const Side) const;
   //==============================================================================
   // MARK: printing
   //==============================================================================
public:
   void dump() const { print(llvm::errs()); }
   void print(raw_ostream &) const;

   void reportMismatch(const LineSpan mismatch, raw_ostream &) const;

   //==============================================================================
   // MARK: testing
   //==============================================================================
public:
   static bool test();
};

} // namespace incremental_ranges
} // namespace polar

#endif /* POLARPHP_DRIVER_SOURCECOMPARATOR_H */
